1 Load Source File

Data pre-processing is included, where special chars and minimal stop-words are removed.

library(data.table)
## load review data
source("~/Dropbox/Eugenie/scripts/libs.R")

2 Time Data vs. Rating

2.1 Plot mean rating over time by brand.bi without splitting incentivized vs. non-incentivized reviews at all

2.1.1 Rating average by day

## Plot by day
ggplot(NULL, aes(x = date, y = day_rating))+
  geom_point(data=reviews.product, aes(color = "daily ave rating"), alpha = 0.7,size=0.2)+
  geom_smooth(data=reviews.product,method = 'auto',aes(color='daily ave rating line'),size=1,alpha = 0.6)+
  theme_minimal()+
  facet_grid(rows=vars(brand.bi))+
  geom_vline(xintercept = as.Date('2016-10-03'), color = "black", size=0.3)+
  labs(x='day',y='mean rating',fill='category',title='Product Category Mean Rating by Day')
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

2.1.2 Rating averaged by week

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

2.2 Plot rating distribution by proportion

2.2.1 Rating proportion in a day

2.2.2 Rating proportion in a week

3 Time Data vs. Review Length

3.1 Average review length

3.1.1 Length average by day

## Plot by day
ggplot(NULL, aes(x = date, y = day_length))+
  geom_point(data=reviews.product, aes(color = "daily ave length"), alpha = 0.7,size=0.2)+
  geom_smooth(data=reviews.product,method = 'auto',aes(color='daily ave length line'),size=1,alpha = 0.6)+
  theme_minimal()+
  facet_grid(rows=vars(brand.bi))+
  geom_vline(xintercept = as.Date('2016-10-03'), color = "black", size=0.3)+
  labs(x='day',y='mean length',fill='category',title='Product Category Mean Length by Day')
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

3.1.2 Length averaged by week

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

3.2 Plot rating distribution by proportion

3.2.1 Rating proportion in a day

3.2.2 Rating proportion in a week

4 Passing vs. Failing Reviews

4.1 Rating average by day

4.1.1 2sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.1.2 3sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.2 Rating averaged by week

4.2.1 2sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.2.2 3sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.3 Length average by day

4.3.1 2sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.3.2 3sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.4 Length averaged by week

4.4.1 2sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4.4.2 3sd

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'